# Immigration and the top 1%

# proportion of foreigners along the distribution

## pick year
y <- 2017

# # # #

## upload data

# file name
file_name <- paste('datalab_data_folder/SA', y, '.csv.gz', sep = "")

# sa
sa <- read_csv(file_name,
               col_types = cols_only(ti = col_double(),
                                     migrant_comb = col_double(),
                                     weight = col_double(),
                                     emp_inc = col_double()))

# file name
file_name <- paste('datalab_data_folder/paye', y, '.csv.gz', sep = "")

# paye
paye <- read_csv(file_name,
                 col_types = cols_only(ti = col_double(),
                                       migrant_comb = col_double(),
                                       weight = col_double(),
                                       emp_inc = col_double()))

# # # #

## append
data <- bind_rows(sa, paye)

#recode missings to zero
data <- data %>% 
  mutate(weight = case_when(is.na(weight) ~ 0, 
                            TRUE ~ weight))

# sort by income
data <- data %>%
  arrange(desc(ti))

rm(sa,paye)

# # # #

## percentiles: based on size of percentile determined by ONS population estimates

# number of percentiles in our data 
t <- sum(data$weight)/psize #account for weights

# take the floor
t <- floor(t)

# Function for a given percentile x
tab <- future_map_dfr(1:t, function(x)  {
  
  # start
  n_start <- (psize*(x-1)) + 1
  
  # end
  n_end <- psize*x
  
  # cut the data 
  dd <- data %>% arrange(desc(ti)) %>% 
    mutate(cumpop = cumsum(weight)) %>% 
    filter(cumpop >= n_start & cumpop < n_end) %>% 
    mutate(ones = 1)
  
  # count migrants
  dd %>%
    summarise(migrants = sum(migrant_comb*weight),
              upper_inc = max(ti),
              lower_inc = min(ti),
              n = sum(weight),
              prop = migrants/n,
              pctile_from_top = 100 - x,
              ucount = sum(ones),
              ucount_migrants = sum(migrant_comb))
})

# thresholds
thresh <- tab %>% select(pctile_ons = pctile_from_top, lower_inc, upper_inc)


## graph
tab %>%
  ggplot(., aes(pctile_from_top, prop)) +
  geom_point() +
  scale_y_continuous(limits = c(0, .25))

# # # #

# save output
tab <- tab %>%
    select(pctile = pctile_from_top, prop, n, ucount) %>%
    arrange(desc(pctile)) %>% 
    mutate(tax_year = y)
write_csv(tab, 'output/migrants_cross_section.csv')
